# 获取fileid和hasfulltext字段


def handle(in_file, out_file, out_file2):
    fp = open(in_file, 'r', encoding='utf-8')
    fout = open(out_file, 'w', encoding='utf-8')
    fout2 = open(out_file2, 'w', encoding='utf-8')
    lines = fp.readlines()
    count = 0
    for line in lines:
        line_list = line.split("\t")
        count += 1
        if count % 100000 == 0:
            print("执行到了第%d条" % count)
        if line_list[1][-2:] == '11':
            fout.write(line.strip() + "\n")
        else:
            fout2.write(line.strip() + "\n")
    print("总数：", count)


if __name__ == "__main__":
    infile = 'D:/Testtext/valid_docid.txt'
    outfile = 'D:/Testtext/valid_docid_zh_cn.txt'
    outfile2 = 'D:/Testtext/valid_docid_en.txt'
    handle(infile, outfile, outfile2)
